home *** CD-ROM | disk | FTP | other *** search
- /* mods 871211,22 for HyperCard XFCN work -- remove <stdio> stuff and
- * go to the Mac toolbox and HC system ... ^z
- */
-
- /* file "merge_indices.c" ... by ^z -- 870823-...
- * function to merge sorted indices together repeatedly until finished
- * with them all in a single set of *.k/*.p files ...
- *
- * The merging strategy is straightforward enough:
- * Let "g" denote the generation_number and "f" denote the file_number.
- * Temporary file names begin with the letter z, which is then followed
- * by a generation number (decimal), the letter k or p (standing for
- * key or ptr, respectively), and then the file number (decimal). Thus,
- * the file "z0k0" is the keys file #0 for generation #0 (the starting,
- * pre-merging, generation), file "z2p3" is the ptr file #3 for generation
- * #2, etc.
- *
- * (The following discussion is specifically for a 2-way merge ... but
- * the generalization for N-way merging is straightforward.)
- *
- * On a given call to merge_indices, the following may happen:
- * - files zgkf/zgpf and zgk(f+1)/zgp(f+1) are merged into files
- * z(g+1)k(f/2)/z(g+1)p(f/2), and then the parent files are
- * deleted;
- * - file zgkf isn't found, which means we are done with this
- * generation and must go on to the next;
- * - file zgk(f+1) isn't found, which means that either we are
- * completely done with the merging work (if f=0) and just
- * have to rename the files zgkf/zgpf into the correct final
- * names (that is, doc_filename.k/doc_filename.p), or else
- * (if f>0) we have an odd number of files for this level
- * of merging, and therefore just have to rename zgkf/zgpf
- * to z(g+1)k(f/2)/z(g+1)p(f/2).
- */
-
- /* modified 871224 to get rid of static variables used for generation_number
- * and file_number ... instead, make the function return the upcoming
- * generation_number, and let the caller reset the file_number to zero
- * when the generation_number returned differs from the input number, or
- * increment the file_number by NMERGE when still on same generation.
- * Also, when all finished, return an illegal (negative) generation_number
- * value. ^z
- */
-
- #include "buildIndex.1.h"
-
- int merge_indices (zbufsiz, doc_file, vRef0, file_number,
- generation_number, doc_filename)
- long zbufsiz;
- int doc_file, vRef0, file_number, generation_number;
- Str255 doc_filename;
- {
- int ink[NMERGE], inp[NMERGE], outk, outp;
- long inwords, indistinctwords, outdistinctwords;
- int i, n;
- char info[128];
-
- for (n = 0; n < NMERGE; ++n)
- {
- ink[n] = open_inkfile (file_number + n, vRef0, generation_number);
- if (ink[n] == NULL)
- break;
- inp[n] = open_inpfile (file_number + n, vRef0, generation_number);
- }
-
- if (file_number + n == 1)
- {
- close_infiles (ink, inp, n);
- fix_final_file_names (doc_filename, vRef0, generation_number);
- return (-1);
- }
-
- if (n < 2)
- {
- if (n == 1)
- {
- close_infiles (ink, inp, n);
- fix_oddball_file_name (vRef0, generation_number, file_number);
- }
- strncpy (info + 1, "Beginning merge generation #", 28);
- i = 29 + putNum (info + 29, 2 + generation_number);
- info[0] = i - 1;
- give_msg (info);
- return (generation_number + 1);
- }
-
- outk = open_outkfile (vRef0, generation_number, file_number);
- outp = open_outpfile (vRef0, generation_number, file_number);
-
- inwords = 0;
- indistinctwords = 0;
- for (i = 0; i < n; ++i)
- {
- inwords += file_size (inp[i]) / sizeof(long);
- indistinctwords += file_size (ink[i]) / sizeof(KEY_REC);
- }
-
- nway_merge_kpfiles (ink, inp, outk, outp, n, zbufsiz);
-
- outdistinctwords = file_size (outk) / sizeof(KEY_REC);
-
- strncpy (info + 1, "Merge #", 7);
- i = 8 + putNum (info + 8, 1 + file_number / NMERGE);
- strncpy (info + i, ": ", 3);
- i += 3;
- i += putNum (info + i, inwords);
- strncpy (info + i, " total words, ", 14);
- i += 14;
- i += putNum (info + i, indistinctwords);
- strncpy (info + i, " distinct words in, ", 20);
- i += 20;
- i += putNum (info + i, outdistinctwords);
- strncpy (info + i, " out.", 5);
- i += 5;
- info[0] = i - 1;
- give_msg (info);
-
- close_infiles (ink, inp, n);
- FSClose (outk);
- FSClose (outp);
- remove_used_infiles (n, vRef0, generation_number, file_number);
-
- return (generation_number);
- }
-
-